---
title: 'Optimal Deposit Insurance: Direct Measurement'
author: "Eduardo Davila & Itay Goldstein"
date: "October 2022"
output:
  pdf_document: default
  html_document: default
---

```{r set_path, echo=FALSE, include=FALSE}


library("here")

path <- here::here(); print(path); setwd(path); rm(path)

#setwd(path); rm(path)

library("tidyverse")
library("foreign")
library("knitr")

```

# Import Data

WRDS CDS data. Quotes restricted to be: North America, United States, USD, senior and financials.

We filter the following banks: C BACF-BankNA BACORP JPM WB WFC BK STT GS MWD MER

| Firm                   |  Ticker     |
|------------------------|------------:|
| Bank of America NA     | BACF-BankNA |
| Bank of America Corp   | BACORP      |
| Bank of NY Mellon      | BK          |
| Citigroup Inc          | C           |
| Goldman Sachs          | GS          |
| JP Morgan Chase        | JPM         |
| Merrill Lynch & Co Inc | MER         |
| Morgan Stanley         | MWD         |
| State Street Corp      | STT         |
| U.S. Bancorp           | USB         |
| Wachovia Corp          | WB          |
| Wells Fargo & Co       | WFC         |

We then create implied default probabilities.
banks <- c("C", "BACF-BankNA", "BACORP", "JPM", "WB", "USB", "WFC", "BK", "STT", "GS", "MWD", "MER")

We only keep BoA Corp, which is the whole corporation. BoA NA has poor quality quotes too. We also drop U.S. Bancorp for the same reason.

We also importa data from FDIC failure statistics.

```{r}

cds <- read.dta("data_raw/wrds_markit_long.dta") %>% 
  select(date, ticker, shortname, rating5y, spread5y, recovery) %>%
  mutate(implied_def_prob = spread5y/(1 - recovery))

cds <- cds %>% filter(!ticker %in% c("BACF-BankNA", "USB"))

# FDIC

failures <- read.csv("data_raw/failures_by_year.csv", header = TRUE) %>% 
  select(Year, Total.Institutions.) %>% 
  rename(year = Year, failures = Total.Institutions.) %>% 
  filter(year != "Total") %>% mutate(year = as.numeric(year), failures = as.numeric(failures))

total <- read.csv("data_raw/summary_by_year.csv" , header = TRUE) %>% 
  select(YEAR, BANKS) %>% 
  rename(year = YEAR, banks = BANKS) %>% 
  mutate(year = as.numeric(year), banks = as.numeric(banks))

fdic <- left_join(total, failures, by = "year")

# The maximum and minimum dates are

first_date <- min(cds$date)
last_date  <- max(cds$date)

```
The first date in the datset is `r first_date`.  
The last date in the dataset is `r last_date`.

# FDIC data

```{r}

fdic <- fdic %>% mutate(failure_rate = failures/banks)

avg_failure_rate       <- fdic %>% summarize(mean(failure_rate))
avg_failure_rate_pre08 <- fdic %>% filter(year <= 2007) %>% summarize(mean(failure_rate))

```

The average implied probability of failure is `r avg_failure_rate*100`%.  
The average implied probability of failure is `r avg_failure_rate_pre08*100`% (using data from < 2017).  

# Range of dates around 08 episode, before, and after

We focus on 2008-10-03.

```{r}

# Number of days
range <- 14

# Dataset filtered around event
df_08     <- cds %>% filter(date >= as.Date("2008-10-03") - range & date <= as.Date("2008-10-03") + range)
before_07 <- cds %>% filter(date <= "2007-01-01")
early_08  <- cds %>% filter(date >= "2008-01-01" & date <= "2008-07-01")
after_09  <- cds %>% filter(date >= "2012-01-01" & date <= "2014-01-01")

# Before, after and whole
mean_before  <- mean(before_07$spread5y, na.rm = TRUE)
mean_after   <- mean(after_09$spread5y)
mean_full    <- mean(cds$spread5y,       na.rm = TRUE)
mean_early08 <- mean(early_08$spread5y,  na.rm = TRUE)

```

The average implied default rate before 2007 is `r mean_before*100`%.  
The average implied default rate after 2009 is `r mean_after*100`%.  
The average implied default rate early 2008 is `r mean_early08*100`%.  
The average implied default rate full       is `r mean_full*100`%.

# Calculations for 2008 episode

```{r}

change_08 <- cds %>% 
  filter(date >= as.Date("2008-10-02") & date <= as.Date("2008-10-03")) %>%
  group_by(ticker) %>%
  summarise(dif    = last(implied_def_prob) - first(implied_def_prob), 
            before = first(implied_def_prob), 
            after  = last(implied_def_prob), 
            ratio  = last(implied_def_prob)/first(implied_def_prob) - 1)

#change <- cds %>%  filter(date >= as.Date("2008-09-29") & date <= as.Date("2008-10-03"))

# Averages before, after, difference, ratio
avg_ratio          <- mean(change_08$ratio)
avg_ratio_neg      <- mean(change_08$ratio[change_08$ratio < 0])
avg_difference     <- mean(change_08$dif)
avg_difference_neg <- mean(change_08$dif[change_08$dif < 0])
avg_before         <- mean(change_08$before)
avg_after          <- mean(change_08$after)

ratio_of_averages <- mean(change_08$dif)/mean(change_08$before)

# Large change afterwards
change_08_next <- cds %>% 
  filter(date >= as.Date("2008-10-13") & date <= as.Date("2008-10-14")) %>%
  group_by(ticker) %>%
  summarise(dif    = last(implied_def_prob) - first(implied_def_prob), 
            before = first(implied_def_prob), 
            after  = last(implied_def_prob), 
            ratio  = last(implied_def_prob)/first(implied_def_prob) - 1)

# Averages before, after, difference, ratio
avg_ratio_next          <- mean(change_08_next$ratio)
avg_ratio_neg_next      <- mean(change_08_next$ratio[change_08_next$ratio < 0])
avg_difference_next     <- mean(change_08_next$dif)
avg_difference_neg_next <- mean(change_08_next$dif[change_08_next$dif < 0])
avg_before_next         <- mean(change_08_next$before)
avg_after_next          <- mean(change_08_next$after)

ratio_of_averages_next <- mean(change_08_next$dif)/mean(change_08_next$before)

```

The average implied probability on October 2nd is `r avg_before*100`%.  
The average implied probability on October 3rd is `r avg_after*100`%.  
The average difference    in implied probabilities is `r avg_difference*100`%.  
The average difference    in implied probabilities among banks with dif<0 is `r avg_difference_neg*100`%.  
The average percent decrease in implied probabilities is `r avg_ratio*100`%.  
The average percent decrease in implied probabilities among banks with diff<0 is `r avg_ratio_neg*100`%.  

The difference in the ratio of averages before and after is `r ratio_of_averages*100`%.
